Gmail to Vector Embeddings with PGVector and Ollama

工作流概述

这是一个包含20个节点的复杂工作流,主要用于自动化处理各种任务。

工作流源代码

下载
{
  "id": "ZiIoKEClTk83g1Jt",
  "meta": {
    "instanceId": "8a3ba313628b26e4e4cf0504ff23322f235d6b433d92e59bcf8762764730ed80",
    "templateCredsSetupCompleted": true
  },
  "name": "Gmail to Vector Embeddings with PGVector and Ollama",
  "tags": [],
  "nodes": [
    {
      "id": "162b1a8b-2471-4880-9fcb-7f2dcfe175a8",
      "name": "Embeddings Ollama",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOllama",
      "position": [
        1920,
        -100
      ],
      "parameters": {
        "model": "nomic-embed-text:latest"
      },
      "credentials": {},
      "typeVersion": 1
    },
    {
      "id": "49eb04b0-3b54-499c-ba46-3251102a4017",
      "name": "Default Data Loader",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        2040,
        -97.5
      ],
      "parameters": {
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "emails_metadata.id",
                "value": "={{ $('Extract email fields').item.json.email_id }}"
              },
              {
                "name": "emails_metadata.thread_id",
                "value": "={{ $('Extract email fields').item.json.thread_id }}"
              }
            ]
          }
        },
        "jsonData": "={{ $('Extract email fields').item.json.email_text }}",
        "jsonMode": "expressionData"
      },
      "typeVersion": 1
    },
    {
      "id": "b4853472-6ac7-4da5-97b3-b22950ddff06",
      "name": "Recursive Character Text Splitter",
      "type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
      "position": [
        2128,
        100
      ],
      "parameters": {
        "options": {},
        "chunkSize": 2000,
        "chunkOverlap": 50
      },
      "typeVersion": 1
    },
    {
      "id": "b189f134-f78e-438f-9189-2f2b276b487d",
      "name": "Gmail Trigger",
      "type": "n8n-nodes-base.gmailTrigger",
      "position": [
        1260,
        280
      ],
      "parameters": {
        "simple": false,
        "filters": {
          "labelIds": [
            "INBOX"
          ]
        },
        "options": {
          "downloadAttachments": true
        },
        "pollTimes": {
          "item": [
            {
              "mode": "everyMinute"
            }
          ]
        }
      },
      "credentials": {},
      "typeVersion": 1.2
    },
    {
      "id": "81cba4c5-7762-483d-a076-3fa8799f70ce",
      "name": "Loop Over Items",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        840,
        40
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "f82243ad-6efd-4be2-bf4e-5001870ae854",
      "name": "Split Out",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        640,
        40
      ],
      "parameters": {
        "options": {
          "destinationFieldName": "after"
        },
        "fieldToSplitOut": "weeks"
      },
      "typeVersion": 1
    },
    {
      "id": "2163d5ec-416f-4299-8a9d-10c26eaef32f",
      "name": "Was manually triggered?",
      "type": "n8n-nodes-base.if",
      "position": [
        2416,
        -145
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "3cbc77e7-1796-4e1b-bbff-6391dd131336",
              "operator": {
                "type": "boolean",
                "operation": "false",
                "singleValue": true
              },
              "leftValue": "={{ $('Manual Trigger').isExecuted }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "76557325-c94e-47a9-9384-e6cbea94f67e",
      "name": "Manual Trigger",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        0,
        40
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "b9400906-a458-4305-8805-bb6bea17396b",
      "name": "No Operation, do nothing",
      "type": "n8n-nodes-base.noOp",
      "position": [
        2636,
        -145
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "5f0aa7c2-85b3-4585-8c8c-727af27de61c",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -60,
        -540
      ],
      "parameters": {
        "color": 6,
        "width": 1440,
        "height": 780,
        "content": "## Bulk e-mail import

Press the `Test workflow` button to run this once, and bulk import of all your e-mail

### IMPORTANT
Specify your Gmail account creation date by editing the code node"
      },
      "typeVersion": 1
    },
    {
      "id": "2b85d362-d40d-49c0-b3a7-27fdbee8e90b",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        360,
        -100
      ],
      "parameters": {
        "width": 220,
        "height": 300,
        "content": "## Edit this ⬇️
And specify your Gmail account creation date"
      },
      "typeVersion": 1
    },
    {
      "id": "05a9dd25-ae36-4e3c-a249-787ee1047bff",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        740,
        260
      ],
      "parameters": {
        "color": 4,
        "width": 640,
        "height": 180,
        "content": "## Activate the workflow
And this trigger will check for new mail, every minute"
      },
      "typeVersion": 1
    },
    {
      "id": "3f19abc5-a165-49e8-b97e-233c47949e68",
      "name": "Set before and after dates",
      "type": "n8n-nodes-base.set",
      "position": [
        1040,
        -320
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "48e8d703-e52a-46cc-bd72-9b0d3352091b",
              "name": "after",
              "type": "string",
              "value": "={{ $json.after }}"
            },
            {
              "id": "a515cf56-9bc6-4724-a0ef-01a6159606f7",
              "name": "before",
              "type": "string",
              "value": "={{ DateTime.fromISO($json.after).plus(1, 'week').toISODate() }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "af742b17-2086-4698-af3a-32cb7260f380",
      "name": "Extract email fields",
      "type": "n8n-nodes-base.set",
      "position": [
        1480,
        -320
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "f818bad8-b000-499c-b137-de22dff4a343",
              "name": "email_text",
              "type": "string",
              "value": "={{ $json.text }}"
            },
            {
              "id": "68c16520-4a26-4ea9-95f7-ee89b9f53c4f",
              "name": "email_from",
              "type": "string",
              "value": "={{ $json.from?.text ?? '' }}"
            },
            {
              "id": "981f1f5b-ba2f-4153-966c-45bb6b535794",
              "name": "email_to",
              "type": "string",
              "value": "={{ $json.to?.text ?? '' }}"
            },
            {
              "id": "b528dd23-a743-4a55-98df-e1ae823b29b3",
              "name": "date",
              "type": "string",
              "value": "={{ DateTime.fromISO($json.date).toISO() }}"
            },
            {
              "id": "39081032-e503-470b-8d83-b5064238d037",
              "name": "email_id",
              "type": "string",
              "value": "={{ $json.id }}"
            },
            {
              "id": "146e8e72-3c2c-4320-b93a-b109d2e46139",
              "name": "thread_id",
              "type": "string",
              "value": "={{ $json.threadId }}"
            },
            {
              "id": "a49333a5-c565-4d46-8398-d423072b1e4d",
              "name": "email_subject",
              "type": "string",
              "value": "={{ $json.subject }}"
            },
            {
              "id": "806cf930-450e-4221-8061-a71ec8bf9bbe",
              "name": "attachments",
              "type": "array",
              "value": "={{ Object.keys($binary).map(item => $binary[item].fileName).filter(item => !!item) }}"
            },
            {
              "id": "30a38aaf-04c2-4286-99c9-8bb60ae8b317",
              "name": "email_cc",
              "type": "string",
              "value": "={{ $json.cc?.text ?? ''}}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "a51f5d5f-69c7-4153-be7f-492a8694629a",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1640,
        -540
      ],
      "parameters": {
        "width": 720,
        "height": 780,
        "content": "## Magic here 🪄
#### (not really, just statistics)
E-mail is stored in a `emails_metadata` structured table, and also fed to the [`nomic-embed-text`](https://ollama.com/library/nomic-embed-text) model to be stored in a `emails_embeddings` table as [vector embeddings](https://www.pinecone.io/learn/vector-embeddings/) so similarity searches are possible.

The `email_id` field can be used to make the relation between the structured records and the vector embeddings, as it's stored in their metadata as `emails_metadata.id`.
This is also the case for `thread_id`."
      },
      "typeVersion": 1
    },
    {
      "id": "809e9269-1275-4c87-8c7f-1840c76f5b22",
      "name": "Store structured",
      "type": "n8n-nodes-base.postgres",
      "onError": "continueErrorOutput",
      "position": [
        1700,
        -320
      ],
      "parameters": {
        "table": {
          "__rl": true,
          "mode": "name",
          "value": "emails_metadata"
        },
        "schema": {
          "__rl": true,
          "mode": "list",
          "value": "public"
        },
        "columns": {
          "value": {},
          "schema": [
            {
              "id": "email_id",
              "type": "string",
              "display": true,
              "removed": false,
              "required": true,
              "displayName": "email_id",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "thread_id",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "thread_id",
              "defaultMatch": false,
              "canBeUsedToMatch": false
            },
            {
              "id": "email_from",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "email_from",
              "defaultMatch": false,
              "canBeUsedToMatch": false
            },
            {
              "id": "email_to",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "email_to",
              "defaultMatch": false,
              "canBeUsedToMatch": false
            },
            {
              "id": "email_cc",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "email_cc",
              "defaultMatch": false,
              "canBeUsedToMatch": false
            },
            {
              "id": "date",
              "type": "dateTime",
              "display": true,
              "removed": false,
              "required": true,
              "displayName": "date",
              "defaultMatch": false,
              "canBeUsedToMatch": false
            },
            {
              "id": "email_subject",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "email_subject",
              "defaultMatch": false,
              "canBeUsedToMatch": false
            },
            {
              "id": "attachments",
              "type": "array",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "attachments",
              "defaultMatch": false,
              "canBeUsedToMatch": false
            },
            {
              "id": "email_text",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "email_text",
              "defaultMatch": false,
              "canBeUsedToMatch": false
            }
          ],
          "mappingMode": "autoMapInputData",
          "matchingColumns": [
            "email_id"
          ],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {
          "outputColumns": [
            "*"
          ]
        },
        "operation": "upsert"
      },
      "credentials": {},
      "typeVersion": 2.6
    },
    {
      "id": "1c3dca79-381c-411b-8727-baa297e1ceda",
      "name": "Store vectorized",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePGVector",
      "onError": "continueRegularOutput",
      "position": [
        1936,
        -320
      ],
      "parameters": {
        "mode": "insert",
        "options": {},
        "tableName": "emails_embeddings"
      },
      "credentials": {},
      "typeVersion": 1.1
    },
    {
      "id": "3b7e13b2-73e9-42e7-900c-59611fe5af32",
      "name": "Create the table",
      "type": "n8n-nodes-base.postgres",
      "position": [
        200,
        40
      ],
      "parameters": {
        "query": "CREATE TABLE IF NOT EXISTS public.emails_metadata (
    email_id character varying(64) NOT NULL,
    thread_id character varying(64),
    email_from text,
    email_to text,
    email_cc text,
    date timestamp with time zone NOT NULL,
    email_subject text,
    email_text text,
    attachments text[]
);
",
        "options": {},
        "operation": "executeQuery"
      },
      "credentials": {},
      "typeVersion": 2.6
    },
    {
      "id": "19c55312-d1da-4d1e-8637-c5b08a9c1a2d",
      "name": "Explode interval into weeks",
      "type": "n8n-nodes-base.code",
      "position": [
        420,
        40
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "jsCode": "// Edit this
let whenDidICreateMyGmailAccount = DateTime.fromISO('2013-11-01')

// (don't edit further down)
whenDidICreateMyGmailAccount = whenDidICreateMyGmailAccount.set({day: 1})
let now = $now.set({day: 1})
const weeks = []
while (Math.floor(Interval.fromDateTimes(whenDidICreateMyGmailAccount, now).length('weeks')) > -1) {
  weeks.push(now.toISODate())
  now = now.minus({weeks: 1})
}

return {json: { weeks }};"
      },
      "typeVersion": 2
    },
    {
      "id": "aed43a77-6d58-41ba-b0b0-fdd3e9fe777a",
      "name": "Get a batch of messages",
      "type": "n8n-nodes-base.gmail",
      "position": [
        1260,
        -320
      ],
      "webhookId": "bace3678-df5b-4a9c-a1ef-1c219e3fd07b",
      "parameters": {
        "simple": false,
        "filters": {
          "receivedAfter": "={{ $json.after }}",
          "receivedBefore": "={{ $json.before }}"
        },
        "options": {
          "downloadAttachments": true
        },
        "operation": "getAll",
        "returnAll": true
      },
      "credentials": {},
      "typeVersion": 2.1
    }
  ],
  "active": false,
  "pinData": {
    "Manual Trigger": [
      {
        "json": {}
      }
    ]
  },
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "3c337d42-a3bb-4b71-ac36-deaf0cdf6019",
  "connections": {
    "Split Out": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Gmail Trigger": {
      "main": [
        [
          {
            "node": "Extract email fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Manual Trigger": {
      "main": [
        [
          {
            "node": "Create the table",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [],
        [
          {
            "node": "Set before and after dates",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create the table": {
      "main": [
        [
          {
            "node": "Explode interval into weeks",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Store structured": {
      "main": [
        [
          {
            "node": "Store vectorized",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Store vectorized": {
      "main": [
        [
          {
            "node": "Was manually triggered?",
            "type": "main",
            "index": 0
          }
        ],
        []
      ]
    },
    "Embeddings Ollama": {
      "ai_embedding": [
        [
          {
            "node": "Store vectorized",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Default Data Loader": {
      "ai_document": [
        [
          {
            "node": "Store vectorized",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Extract email fields": {
      "main": [
        [
          {
            "node": "Store structured",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get a batch of messages": {
      "main": [
        [
          {
            "node": "Extract email fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Was manually triggered?": {
      "main": [
        [
          {
            "node": "No Operation, do nothing",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set before and after dates": {
      "main": [
        [
          {
            "node": "Get a batch of messages",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Explode interval into weeks": {
      "main": [
        [
          {
            "node": "Split Out",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Recursive Character Text Splitter": {
      "ai_textSplitter": [
        [
          {
            "node": "Default Data Loader",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    }
  }
}

功能特点

  • 自动检测新邮件
  • AI智能内容分析
  • 自定义分类规则
  • 批量处理能力
  • 详细的处理日志

技术分析

节点类型及作用

  • @N8N/N8N Nodes Langchain.Embeddingsollama
  • @N8N/N8N Nodes Langchain.Documentdefaultdataloader
  • @N8N/N8N Nodes Langchain.Textsplitterrecursivecharactertextsplitter
  • Gmailtrigger
  • Splitinbatches

复杂度评估

配置难度:
★★★★☆
维护难度:
★★☆☆☆
扩展性:
★★★★☆

实施指南

前置条件

  • 有效的Gmail账户
  • n8n平台访问权限
  • Google API凭证
  • AI分类服务订阅

配置步骤

  1. 在n8n中导入工作流JSON文件
  2. 配置Gmail节点的认证信息
  3. 设置AI分类器的API密钥
  4. 自定义分类规则和标签映射
  5. 测试工作流执行
  6. 配置定时触发器(可选)

关键参数

参数名称 默认值 说明
maxEmails 50 单次处理的最大邮件数量
confidenceThreshold 0.8 分类置信度阈值
autoLabel true 是否自动添加标签

最佳实践

优化建议

  • 定期更新AI分类模型以提高准确性
  • 根据邮件量调整处理批次大小
  • 设置合理的分类置信度阈值
  • 定期清理过期的分类规则

安全注意事项

  • 妥善保管API密钥和认证信息
  • 限制工作流的访问权限
  • 定期审查处理日志
  • 启用双因素认证保护Gmail账户

性能优化

  • 使用增量处理减少重复工作
  • 缓存频繁访问的数据
  • 并行处理多个邮件分类任务
  • 监控系统资源使用情况

故障排除

常见问题

邮件未被正确分类

检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。

Gmail认证失败

确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。

调试技巧

  • 启用详细日志记录查看每个步骤的执行情况
  • 使用测试邮件验证分类逻辑
  • 检查网络连接和API服务状态
  • 逐步执行工作流定位问题节点

错误处理

工作流包含以下错误处理机制:

  • 网络超时自动重试(最多3次)
  • API错误记录和告警
  • 处理失败邮件的隔离机制
  • 异常情况下的回滚操作